import torchvision

# ==========================train=============================================
Train_data = torchvision.datasets.VOCDetection(root='./VOCDetection',
                                               year='2012',
                                               image_set='train',
                                               download=False,
                                               transform=None,
                                               target_transform=None,
                                               transforms=None)

# ==========================val=============================================
# Validation_data = torchvision.datasets.VOCDetection(root='./VOCDetection',
#                                                     year='2012',
#                                                     image_set='val',
#                                                     download=True,
#                                                     transform=None,
#                                                     target_transform=None,
#                                                     transforms=None)
if __name__ == '__main__':
    print(len(Train_data))
    i = 5
    print(Train_data[i][0])
    print(Train_data[i][1]['annotation'])
    print(Train_data[i][1]['annotation']['object'])

